Transpose

对输入数组按照指定维度顺序(perm)进行转置操作,并输出结果数组。

输入:
  • in_data - 输入数据地址。

  • num_axes - 数据维度数。

  • output_shape - 输出形状数组。

  • perm - 转置维度顺序数组。

  • strides - 输入数据每维步长。

  • out_strides - 输出数据每维步长。

  • core_mask - 核掩码(仅适用于共享存储版本)。

输出:
  • out_data - 转置结果地址。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持fp, dp, int8, int16, int32, clx64, cplx128

  • MT7004 支持hp, fp, i16, i32, cplx64

共享存储版本:

void fp_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const float *in_data, float *out_data, int core_mask)
void hp_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const half *in_data, half *out_data, int core_mask)
void dp_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const double *in_data, double *out_data, int core_mask)
void i8_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const int8_t *in_data, int8_t *out_data, int core_mask)
void i16_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const int16_t *in_data, int16_t *out_data, int core_mask)
void i32_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const int *in_data, int *out_data, int core_mask)
void c64_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const float *in_data, float *out_data, int core_mask)
void c128_transpose_s(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const double *in_data, double *out_data, int core_mask)

C调用示例:

 1#include <stdio.h>
 2#include <transpose.h>
 3
 4int main() {
 5    float *input = (float *)0xA0000000;      // 输入在DDR空间
 6    float *output = (float *)0xC0000000;
 7    int num_axes = 4;
 8    int output_shape[4] = {1, 3, 224, 224};
 9    int perm[4] = {0, 2, 3, 1};
10    int strides[4] = {150528, 50176, 224, 1};
11    int out_strides[4] = {150528, 50176, 224, 1};
12    int core_mask = 0xff;
13
14    fp_transpose_s(num_axes, output_shape, perm, strides, out_strides, input, output, core_mask);
15    return 0;
16}

私有存储版本:

void fp_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const float *in_data, float *out_data)
void hp_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const half *in_data, half *out_data)
void dp_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const double *in_data, double *out_data)
void i8_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const int8_t *in_data, int8_t *out_data)
void i16_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const int16_t *in_data, int16_t *out_data)
void i32_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const int *in_data, int *out_data)
void c64_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const float *in_data, float *out_data)
void c128_transpose_p(int num_axes, const int *output_shape, int *perm, int *strides, int *out_strides, const double *in_data, double *out_data)

C调用示例:

 1#include <stdio.h>
 2#include <transpose.h>
 3
 4int main() {
 5    float *input = (float *)0x10810000;      // 输入在L2空间
 6    float *output = (float *)0x10820000;
 7    int num_axes = 4;
 8    int output_shape[4] = {1, 3, 224, 224};
 9    int perm[4] = {0, 2, 3, 1};
10    int strides[4] = {150528, 50176, 224, 1};
11    int out_strides[4] = {150528, 50176, 224, 1};
12
13    fp_transpose_p(num_axes, output_shape, perm, strides, out_strides, input, output);
14    return 0;
15}